#pca plots
PCAplot(pca.ctr, "Line", Shape = "COiMg", PoV.df=PoV.ctr, pc.1 = 1, pc.2 = 2) #M30 & M33 are outleirs, let's remove them for nowPCAplot(pca.ctr, "condition", Shape = "Line", PoV.df=PoV.ctr, pc.1 = 1, pc.2 = 2) #M30 & M33 are outleirs, let's remove them for nowPCAplot(pca.ctr, "COiMg", Shape = "Line", PoV.df=PoV.ctr, pc.1 = 1, pc.2 = 2) #M30 & M33 are outleirs, let's remove them for nowvolcano_plot(data.frame(new_coimg), title = 'CO vs COiMg',
annotate_by=top40.ctr, ymax1 = 30, ymax2 = 31, xmax1 = -5, xmax2 = 8)ggplot(expression_data_long,aes(x = Gene, y = Expression, fill = COiMG)) +
geom_boxplot(position = position_dodge(width = 0.8), width = 0.7) +
theme_minimal()ComplexHeatmap::Heatmap(hm_counts.ctr,
cluster_rows = T,
cluster_columns = T,
show_row_dend = T,
show_column_dend = T,
show_row_names = F,
show_column_names = F,
left_annotation = ra.ctr,
bottom_annotation = HeatmapAnnotation(
text = anno_text(colnames(hm_counts.ctr), rot = 300, just = "left",gp=gpar(fontsize=8))),
col = colorRampPalette(rev(RColorBrewer::brewer.pal(9,"RdBu")))(10),
name = "Z-score expression") ## Enrichment heatmap (gene sets of interest)
ComplexHeatmap::Heatmap(DEG.enrich_coimg,
cluster_rows = F,
cluster_columns = F,
show_row_dend = F,
show_column_dend = F,
show_row_names = T,
col = colorRampPalette(RColorBrewer::brewer.pal(9,"Blues"))(30),
name = "log2(q)",
)dotplot(new_coimg.upregGO, split="ONTOLOGY", showCategory=10) + facet_grid(ONTOLOGY~., scale="free")dotplot(new_coimg.downregGO, split="ONTOLOGY", showCategory=10) + facet_grid(ONTOLOGY~., scale="free")ctr_CO <- batch.rem3_mod1[,colnames(batch.rem3_mod1) %in% rownames(meta[meta$COiMg %in% "no" & meta$condition %in% "IFNg",])]
ctr_COiMG <- batch.rem3_mod1[,colnames(batch.rem3_mod1) %in% rownames(meta[meta$COiMg %in% "yes" & meta$condition %in% "IFNg",])]
cor(ctr_CO,ctr_COiMG)## M13 M14 M15 M21 M27 M33 F9
## M16 0.9888853 0.9750494 0.9854099 0.9860980 0.9844882 0.9665080 0.9772695
## M17 0.9882932 0.9689689 0.9821487 0.9803589 0.9830655 0.9582293 0.9703453
## M18 0.9853977 0.9796923 0.9773084 0.9801409 0.9848437 0.9628487 0.9773172
## M24 0.9893057 0.9799679 0.9870001 0.9855227 0.9831987 0.9708646 0.9793361
## M30 0.9839439 0.9849775 0.9782459 0.9812581 0.9760353 0.9881220 0.9857833
## M36 0.9808666 0.9739084 0.9819043 0.9848451 0.9726188 0.9723869 0.9735683
## F15 0.9896431 0.9848594 0.9858859 0.9880105 0.9875725 0.9839841 0.9898616
## F16 0.9736695 0.9781072 0.9730415 0.9772165 0.9707696 0.9863550 0.9842931
## F17 0.9837642 0.9760274 0.9815035 0.9841363 0.9823853 0.9590486 0.9744931
## F10
## M16 0.9820133
## M17 0.9788688
## M18 0.9814824
## M24 0.9853527
## M30 0.9823925
## M36 0.9794674
## F15 0.9870064
## F16 0.9753547
## F17 0.9794916
df1 <- data.frame('CO'=rowMeans(ctr_CO))
df2 <- data.frame('COiMG'=rowMeans(ctr_COiMG))
df.plot <- cbind(df1,df2)
quantile(as.matrix(df1-df2))## 0% 25% 50% 75% 100%
## -5.52300947 -0.16864063 0.01132637 0.16271150 1.86109744
rownames(df.plot)[df1-df2 >= 1 | df1-df2 <= -4]## [1] "DLX6" "NOS2" "TFAP2B" "MGST1" "PAX7" "KITLG" "ASIC4"
## [8] "NAALAD2" "MOXD1" "LRP2" "SLC32A1" "TOX3" "WNT3" "FZD10"
## [15] "SPP1" "ONECUT2" "PAX3" "GAD2" "ASCL1" "ZFHX3" "GREB1L"
## [22] "C1QL2" "DLX1" "SFRP2" "TENM2" "CDH8" "POU4F1" "ZIC1"
## [29] "ROBO3" "WNT7A" "ZIC3" "H4C8" "VCAM1" "NDST3" "CRABP1"
## [36] "PTF1A" "GBX2" "SHOX2" "IRX1" "IRX2" "HSPA6" "LRRN3"
## [43] "WFIKKN2" "ZIC4" "PRIMA1" "IRX5" "OLIG3" "IRX3" "GSX2"
## [50] "FIGN" "BGN" "SOX1" "POU3F2" "PCDH18" "GREB1" "POU3F4"
## [57] "HES5" "TOX" "HSPA1B" "HSPA1A" "VGLL3" "TRIM71" "UBD"
## [64] "SP9" "OR2I1P" "H4C14" "H2BC8"
Gene <- ifelse(rownames(df.plot) %in% rownames(df.plot)[df1-df2 >= 1 | df1-df2 <= -4], rownames(df.plot), "")
ggplot(df.plot, aes(x=CO,y=COiMG))+
geom_point() +
labs(title = "Expression overlap CO vs COiMG under IFNy stimulation",
x = "Standardized CO counts",
y = "Standardized COiMG counts") +
geom_smooth(method=lm, linetype="dashed",
color="darkred", fill="blue", se=TRUE) +
geom_point(shape=18, color="grey")+
stat_cor(method = "pearson", label.x = 0, label.y = 20)+
geom_text_repel(data = df.plot, aes(x = CO, y = COiMG, label = Gene), vjust = 3, size = 3) +
theme_minimal()PCAplot(pca.ifny, "condition", Shape = "Line", PoV.df=PoV.ifny, pc.1 = 1, pc.2 = 2) #M30 & M33 are outleirs, let's remove them for nownew_IFNy$symbol <- rownames(new_IFNy)
top30.new_IFNy <- c(rownames(new_IFNy[order(new_IFNy$log2FoldChange),][new_IFNy[order(new_IFNy$log2FoldChange),]$padj < 0.05,][1:15,]),
rownames(new_IFNy[order(new_IFNy$log2FoldChange, decreasing = T),][new_IFNy[order(new_IFNy$log2FoldChange, decreasing = T),]$padj < 0.05,][1:15,]))
volcano_plot(data.frame(new_IFNy), title = 'ctr vs IFNy',
annotate_by=top30.new_IFNy, ymax1 = 60, ymax2 = 61, xmax1 = -5, xmax2 = 10)for (i in unique(meta.CO$Line)){
boxplot.g_ifny_byline <- boxplot.g_ifny[boxplot.g_ifny$Sample %in% rownames(meta.CO[!meta.CO$condition %in% 'LPS',][meta.CO[!meta.CO$condition %in% 'LPS',]$Line %in% i,]),]
expression_data_long.ifny_byline <- gather(boxplot.g_ifny_byline, key = "Gene", value = "Expression", -Sample, -Stimulation)
p <- ggplot(expression_data_long.ifny_byline,aes_string(x = 'Gene', y = 'Expression', fill = 'Stimulation')) +
geom_boxplot(position = position_dodge(width = 0.8), width = 0.7) +
labs(y=paste('Expression in',i)) +
theme_minimal()
plot(p)
}ComplexHeatmap::Heatmap(hm_counts.ifny,
cluster_rows = T,
cluster_columns = T,
show_row_dend = T,
show_column_dend = T,
show_row_names = F,
show_column_names = F,
left_annotation = ra.ifny,
bottom_annotation = HeatmapAnnotation(
text = anno_text(colnames(hm_counts.ifny), rot = 300, just = "left",gp=gpar(fontsize=8))),
col = colorRampPalette(rev(RColorBrewer::brewer.pal(9,"RdBu")))(10),
name = "Z-score expression")ComplexHeatmap::Heatmap(DEG.enrich_ifny,
cluster_rows = F,
cluster_columns = F,
show_row_dend = F,
show_column_dend = F,
show_row_names = T,
col = colorRampPalette(RColorBrewer::brewer.pal(9,"Blues"))(30),
name = "log2(q)",
)dotplot(new_IFNy.upregGO, split="ONTOLOGY", showCategory=10) + facet_grid(ONTOLOGY~., scale="free")dotplot(new_IFNy.downregGO, split="ONTOLOGY", showCategory=10) + facet_grid(ONTOLOGY~., scale="free")#==========iMG - COiMG correlation===============+#
#integrate iMG with COiMG
load('/Users/kubler01/Documents/R projects/bulk-org/data/Psamples_gene_matrix.RData')
genes_countsmeta_p <- data.frame(readxl::read_xlsx('/Users/kubler01/Documents/R projects/bulk-org/meta_P.xlsx'))
counts_p <- genes_counts[,colnames(genes_counts) %in% meta_p$ID]
counts_P.filt <- counts_p[rownames(counts_p) %in% counts_protcod2,]
rownames(counts_P.filt) <- make.unique(protcod_genes_reduced[,2])
#filter genes on expression
gene_names <- rownames(counts_P.filt)
cpm = edgeR::cpm(counts_P.filt)
median_genes_cpm <- enframe(rowMedians(as.matrix(counts_P.filt)), name = "Gene", value = "median_cpm")
median_genes_cpm <- cbind(median_genes_cpm, gene_names)
keep.exp <- dplyr::filter(median_genes_cpm, median_cpm > 1)
keep.exp <- keep.exp$gene_names
counts_P.filt2 <- counts_P.filt[keep.exp,]
dds.P <- DESeqDataSetFromMatrix(countData = round(counts_P.filt2),
colData = meta_p,
design = ~ RIN + `X260.280` + `X260.230` + type) # RIN + Line + Age + `260/230` + `260/280` +
vsd.p <- vst(dds.P)
transformed.p <- data.frame(assay(vsd.p), check.names=F)
batch.p <- removeBatchEffect(transformed.p,
covariates=as.matrix(cbind(
meta_p$`X260.280`,
meta_p$`X260.230`,
meta_p$RIN)),
design=model.matrix(~ meta_p$type))
iMG <- batch.p[,colnames(batch.p) %in% meta_p[meta_p$type %in% 'iMG',]$ID]
ocMG <- batch.p[,colnames(batch.p) %in% meta_p[meta_p$type %in% 'ocMG',]$ID]
cor(iMG,ocMG)## P12 P13
## P10 1.0000000 0.9999866
## P11 1.0000000 0.9999866
## P9 0.9999866 1.0000000
df1 <- data.frame('iMG'=rowMeans(iMG))
df2 <- data.frame('ocMG'=rowMeans(ocMG))
df.plot <- cbind(df1,df2)
quantile(as.matrix(df1-df2))## 0% 25% 50% 75% 100%
## -1.517314803 -0.060528021 0.007527633 0.078884006 1.029107876
rownames(df.plot)[df1-df2 >= 0.5 | df1-df2 <= -1]## [1] "SCIN" "NR1H3" "APBA2" "CDH1" "RAI14" "ELN"
## [7] "FAR2" "NAV3" "HES2" "NUAK1" "FSCN1" "SLC1A3"
## [13] "EPB41L2" "OAS1" "ITGA6" "BLNK" "PLTP" "MXRA5"
## [19] "PTGDS" "ADGRD1" "KIF20A" "STEAP3" "OTOF" "CD207"
## [25] "GBP1" "SPP1" "LTBP2" "KCNJ5" "KIAA1217" "KCNJ2"
## [31] "AHNAK" "MCF2L" "CFP" "SLC44A2" "APOC1" "COL5A1"
## [37] "PPARG" "CLEC10A" "RIN2" "ACY3" "CHIT1" "NTS"
## [43] "ETS1" "ADAM19" "ITGA7" "DYSF" "GYPC" "ANGPTL2"
## [49] "IRF4" "IFI44L" "CHRNA1" "RGS16" "GRIP2" "PTPRG"
## [55] "MKI67" "HS3ST3A1" "MYO1E" "JAML" "PLPP3" "KIF26B"
## [61] "FYCO1" "MELTF" "IFI27" "HTRA1" "CX3CR1" "SOCS6"
## [67] "SH3RF3" "HSPB7" "TP53I11" "LRRN1" "ALS2CL" "PCED1B"
## [73] "TSPAN10" "CADM1" "KCNQ3" "INKA1" "POU3F1" "GLDN"
## [79] "RYR1" "GAL3ST4" "PDGFA" "ADGRG1" "LILRA4" "SIGLEC12"
Gene <- ifelse(rownames(df.plot) %in% rownames(df.plot)[df1-df2 >= 0.5 | df1-df2 <= -1], rownames(df.plot), "")
ggplot(df.plot, aes(x=iMG,y=ocMG))+
geom_point() +
labs(title = "Expression overlap iMG and ocMG",
x = "Standardized iMG counts",
y = "Standardized ocMG counts") +
geom_smooth(method=lm, linetype="dashed",
color="darkred", fill="blue", se=TRUE) +
geom_point(shape=18, color="grey")+
stat_cor(method = "pearson", label.x = 0, label.y = 20)+
geom_text_repel(data = df.plot, aes(x = iMG, y = ocMG, label = Gene), vjust = 3, size = 3) +
theme_minimal()